This geospatial model uses routinely collected malaria case data, population data and remotely sensed data, such as open and vegetated water bodies, to estimate population living around open water bodies, and ultimately quantify the association between proximity to larval habitat and malaria risk in health facility catchment areas in Kasungu. The buffer layer around waterbodies are created and then combined with population data in raster format to estimate the total population that is within the buffer. The data used spans from 2017 to 2020 and was derived from digitized DHIS2 malaria records, aggregated population geospatial layer and TropWet tool in Google Earth Engine.
Loading the R packages that will be used to read in, view, transform and model the malaria cases and spatial datasets.
library(spatialEco)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(plotly)
library(lubridate)
library(knitr)
library(raster)
library(rgdal)
library(rgeos)
library(sf)
library(sp)
library(tmap)
library(spdep)
library(maptools)
library(gridExtra)
library(grid)
library(exactextractr)
library(DataExplorer)
library(mapview)
`%>%` <- magrittr::`%>%`
here::here()
## [1] "C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021"
The total dry season malaria cases recorded at health-care facilities in Kasungu from 2017 to 2019 are contained in the KasunguData.csv sourced from https://dhis2.health.gov.mw/. The kasungu_facility_catchments_2004.shp shapefile also contains the population and health information within each health-facility catchment area in Kasungu district. The aggregated population raster layers for Malawi e.g.,ku_pop_2017_1km_aggregated.tif were downloaded from the Open Spatial and Demographic and Data Research website: https://www.worldpop.org/geodata/country?iso3=MWI. These layers estimate total number of people per grid-cell. The units are number of people per pixel with country totals adjusted to match the corresponding official United Nations population estimates. The datasets were downloaded in Geotiff at a resolution of 1km and are projected in Geographic Coordinate System, WGS84. The kasungu_water.shpand water_bodies layers contain open and vegetated waterbodies polygons, detected using the Tropical Wetland Unmixing Tool (TropWet). TropWet is a Google Earth Engine hosted toolbox that uses the Landsat archive to map tropical wetlands and can be accessed through: https://www.aber.ac.uk/en/dges/research/earth-observation-laboratory/research/tropwet/
# 2017, 2018 and 2019 dry season malaria cases
dry_season_malaria_2015_2019 <- read.csv(here::here("data", "dry_season_malaria 2015-2019.csv"))
# Explore malaria data
# dry_season_malaria_2015_2019 %>% dplyr::glimpse() %>%
# DataExplorer::introduce() %>%
# DataExplorer::plot_missing()
# Export 2017, 2018 and 2019 dry season malaria cases
write.csv(dry_season_malaria_2015_2019, file = "data/dry_season_malaria_2017_2019.csv")
write.table(dry_season_malaria_2015_2019, file = "dry_season_malaria_2017_2019.txt",
sep = ",", quote = FALSE, row.names = FALSE)
# 2020 dry season malaria cases
ku_malaria_2020 <- read.csv(here::here("data", "dry_season_malaria_2020.csv"))
ku_malaria_2020 %>% dplyr::glimpse()
## Rows: 36
## Columns: 9
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ~
## $ Names <chr> "Anchor Farm Health Centre", "Bua Health Centre (Kasung~
## $ May.2020 <int> 1172, 2293, 855, NA, 258, 806, 2468, 1327, 1190, 81, 80~
## $ June.2020 <int> 571, 1310, 24, 50, 126, 551, 1756, 66, 1102, 26, 511, N~
## $ July.2020 <int> 438, 634, 143, 37, 242, 314, 1691, 799, 667, 14, 407, N~
## $ August.2020 <int> 184, 482, 122, 137, 259, 146, 1177, 640, 375, 6, 372, N~
## $ September.2020 <int> 214, 565, 115, 117, 225, 202, 1240, 626, 270, 8, 413, N~
## $ October.2020 <int> 258, 910, 231, 201, 276, 392, 1296, 738, 353, 12, 494, ~
## $ dr_2020 <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 14~
# Merge 2015 to 2019 dry season malaria case data with 2020 data
dry_season_malaria_2017_2020 <- cbind.data.frame(dry_season_malaria_2015_2019, ku_malaria_2020)
dry_season_malaria_2017_2020 <- dry_season_malaria_2017_2020[,c("FID", "Names", "dr_2017",
"dr_2018", "dr_2019", "dr_2020",
"LONGITU", "LATITUD")]
# Export 'dry season malaria 2017-2020' as csv
write.csv(dry_season_malaria_2017_2020, file = "data/dry_season_malaria_2017_2019.csv")
dry_season_malaria_2017_2020 %>% dplyr::glimpse()
## Rows: 36
## Columns: 8
## $ FID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
## $ Names <chr> "Anchor Farm", "Bua Health Centre", "Chamama Health Facility",~
## $ dr_2017 <int> 2966, 3489, 1878, NA, 3601, 2292, 5801, 2192, 2745, NA, NA, NA~
## $ dr_2018 <int> 3142, 1804, 1750, NA, 4027, 2116, 4502, 2394, 4138, NA, NA, NA~
## $ dr_2019 <int> 1978, 1740, 1508, NA, 1686, 1770, 5470, 2553, 2200, NA, NA, NA~
## $ dr_2020 <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 147, 3003~
## $ LONGITU <dbl> 33.39000, 33.53859, 33.77686, NA, 33.68528, 33.79701, 33.30816~
## $ LATITUD <dbl> -13.41000, -13.29403, -12.92431, NA, -13.10757, -12.97452, -12~
# Kasungu district boundary shapefile
kasungu_district <- st_read(here::here("data", "kasungu_district.shp"))
## Reading layer `kasungu_district' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_district.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 5 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 491272.7 ymin: 8494349 xmax: 609044.2 ymax: 8632164
## Projected CRS: WGS 84 / UTM zone 36S
# Kasungu health facility catchment boundary shapefiles
malire <- sf::st_read(here::here("data", "kasungu_health_facility_catchments.shp")) # from 20ish years ago
## Reading layer `kasungu_health_facility_catchments' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_health_facility_catchments.shp' using driver `ESRI Shapefile'
## Simple feature collection with 34 features and 21 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 491272.8 ymin: 8494349 xmax: 609044.2 ymax: 8632164
## Projected CRS: WGS 84 / UTM zone 36S
malire_new <- sf::st_read(here::here("data", "Kasungu_new_health_fac_catchment_clip.shp")) # generated from accessibility mapping
## Reading layer `Kasungu_new_health_fac_catchment_clip' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\Kasungu_new_health_fac_catchment_clip.shp' using driver `ESRI Shapefile'
## Simple feature collection with 29 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 491878.5 ymin: 8494645 xmax: 609044.2 ymax: 8631908
## Projected CRS: WGS 84 / UTM zone 36S
# malire_new <- spTransform(malire_new, proj4string(malire_new))
# Kasungu population raster layer
kasungu_population_2017 <- raster(here::here("data", "ku_pop_2017_1km_aggregated.tif"))
kasungu_population_2018 <- raster(here::here("data", "ku_pop_2018_1km_aggregated.tif"))
kasungu_population_2019 <- raster(here::here("data", "ku_pop_2019_1km_aggregated.tif"))
kasungu_population_2020 <- raster(here::here("data", "ku_pop_2020_1km_aggregated.tif"))
# Open waterbodies polygons
water_2017 <- st_read(here::here("data", "water_bodies_2017.shp"))
## Reading layer `water_bodies_2017' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\water_bodies_2017.shp' using driver `ESRI Shapefile'
## Simple feature collection with 168 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 514497 ymin: 8495941 xmax: 603149.8 ymax: 8620169
## Projected CRS: WGS 84 / UTM zone 36S
water_2018 <- st_read(here::here("data", "kasungu_2018_water.shp"))
## Reading layer `kasungu_2018_water' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_2018_water.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1105 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 496807.6 ymin: 8494693 xmax: 607913.8 ymax: 8607747
## Projected CRS: WGS 84 / UTM zone 36S
water_2019 <- st_read(here::here("data", "kasungu_2019_water.shp"))
## Reading layer `kasungu_2019_water' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_2019_water.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1941 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 494197.2 ymin: 8494693 xmax: 607913.8 ymax: 8617573
## Projected CRS: WGS 84 / UTM zone 36S
water_2020 <- st_read(here::here("data", "water_bodies_2020.shp"))
## Reading layer `water_bodies_2020' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\water_bodies_2020.shp' using driver `ESRI Shapefile'
## Simple feature collection with 266 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 508985.6 ymin: 8495793 xmax: 585761.1 ymax: 8620169
## Projected CRS: WGS 84 / UTM zone 36S
# Add a field ID to water bodies polygons
water_2017$ID <- 1:nrow(water_2017)
water_2018$ID <- 1:nrow(water_2018)
water_2019$ID <- 1:nrow(water_2019)
water_2020$ID <- 1:nrow(water_2020)
We observe that Kasungu district has 30 health facilities classified as dispensary, health centre, district hospital and rural hospital and the highest malaria cases were recorded at Kasungu District Hospital.
dry_season_malaria_2017_2020 %>%
glimpse() %>%
summary()
## Rows: 36
## Columns: 8
## $ FID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
## $ Names <chr> "Anchor Farm", "Bua Health Centre", "Chamama Health Facility",~
## $ dr_2017 <int> 2966, 3489, 1878, NA, 3601, 2292, 5801, 2192, 2745, NA, NA, NA~
## $ dr_2018 <int> 3142, 1804, 1750, NA, 4027, 2116, 4502, 2394, 4138, NA, NA, NA~
## $ dr_2019 <int> 1978, 1740, 1508, NA, 1686, 1770, 5470, 2553, 2200, NA, NA, NA~
## $ dr_2020 <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 147, 3003~
## $ LONGITU <dbl> 33.39000, 33.53859, 33.77686, NA, 33.68528, 33.79701, 33.30816~
## $ LATITUD <dbl> -13.41000, -13.29403, -12.92431, NA, -13.10757, -12.97452, -12~
## FID Names dr_2017 dr_2018
## Min. : 1.00 Length:36 Min. : 427 Min. : 749
## 1st Qu.: 9.75 Class :character 1st Qu.: 2196 1st Qu.: 2424
## Median :18.50 Mode :character Median : 3132 Median : 3357
## Mean :18.50 Mean : 3586 Mean : 3909
## 3rd Qu.:27.25 3rd Qu.: 3993 3rd Qu.: 4684
## Max. :36.00 Max. :16289 Max. :15821
## NA's :6 NA's :6
## dr_2019 dr_2020 LONGITU LATITUD
## Min. : 533 Min. : 0 Min. :33.18 Min. :-13.57
## 1st Qu.: 1748 1st Qu.: 2286 1st Qu.:33.38 1st Qu.:-13.25
## Median : 2556 Median : 3824 Median :33.50 Median :-12.98
## Mean : 2880 Mean : 4657 Mean :33.52 Mean :-12.99
## 3rd Qu.: 3284 3rd Qu.: 6212 3rd Qu.:33.68 3rd Qu.:-12.79
## Max. :10721 Max. :24424 Max. :33.87 Max. :-12.42
## NA's :6 NA's :6 NA's :6
dry_season_malaria_2017_2020 %>%
plotly::plot_ly(y = ~Names,
x = ~dr_2017,
type = "bar",
orientation = 'h',
name = "2017") %>%
plotly::add_trace(x = ~ dr_2018,
name = "2018") %>%
plotly::add_trace(x = ~ dr_2019,
name = "2019") %>%
plotly::add_trace(x = ~ dr_2020,
name = "2020") %>%
plotly::layout(#barmode = "stack",
xaxis = list(title = "Total malaria cases"),
yaxis = list(title = ""),
hovermode = "compare",
margin = list(b = 10,
t = 10,
pad = 2))
Fig.1 The total malaria cases recorded at each health-care facility in Kasungu district
Heath facility catchment area is the area from which a health facility attracts patients. Data provider is National Statistical Office.
# Using the is.na() function to remove health centres with missing longitude and latitude coordinates
# Aggregating Kasalika Health Centre and Kasungu District Hospital, and
# Kaluluma Rural Hospital and Nkhamenya Rural Hospital malaria cases
# health_facility_aggregated <- dry_season_malaria_2017_2020[!is.na(dry_season_malaria_2017_2020$LONGITU),] %>%
# dplyr::filter(Names != "Kasalika Health Centre",
# Names != "Bua Health Centre",
# Names != "Kaluluma Rural Hospital") %>%
# dplyr::mutate(dr_2017 = ifelse(Names == "Kasungu District Hospital", 4528 + 16289, dr_2017),
# dr_2018 = ifelse(Names == "Kasungu District Hospital", 4493 + 15821, dr_2018),
# dr_2019 = ifelse(Names == "Kasungu District Hospital", 2729 + 10721, dr_2018),
# dr_2020 = ifelse(Names == "Kasungu District Hospital", 4368 + 24424, dr_2020),
# dr_2017 = ifelse(Names == "Nkhamenya Rural Hospital", 2887 + 752, dr_2017),
# dr_2018 = ifelse(Names == "Nkhamenya Rural Hospital", 851 + 3689, dr_2018),
# dr_2019 = ifelse(Names == "Nkhamenya Rural Hospital", 533 + 4004, dr_2019),
# dr_2020 = ifelse(Names == "Nkhamenya Rural Hospital", 3587 + 5929, dr_2020),
# dr_2017 = ifelse(Names == "Mziza Health Centre", 3863 + 3489, dr_2017),
# dr_2018 = ifelse(Names == "Mziza Health Centre", 2815 + 1804, dr_2018),
# dr_2019 = ifelse(Names == "Mziza Health Centre", 2815 + 1804, dr_2019),
# dr_2020 = ifelse(Names == "Mziza Health Centre", 2397 + 6194, dr_2020))
zipatala_aggregated <- dry_season_malaria_2017_2020[complete.cases(dry_season_malaria_2017_2020),] %>%
dplyr::filter(Names != "Kasalika Health Centre",
Names != "Bua Health Centre",
Names != "Kaluluma Rural Hospital")
zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4528 + 16289
zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4493 +15821
zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 2729 + 10721
zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4368 + 24424
zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 2887 + 752
zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 851 + 3689
zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 533 + 4004
zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 3587 + 5929
zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 3863 + 3489
zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 2815 + 1804
zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 2439 + 1740
zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 6194 + 2397
# There are NA values in dry_season_malaria_2017_2020.csv, replace them by 0 and add a column to keep information
# dry_season_malaria_2017_2020$is_na = ifelse(is.na(dry_season_malaria_2017_2020$LONGITU), TRUE, FALSE)
# index = dry_season_malaria_2017_2020$is_na == TRUE
# dry_season_malaria_2017_2020[index, "LONGITU"] <- 0
# dry_season_malaria_2017_2020[index, "LATITUD"] <- 0
#
# dry_season_malaria_2017_2020 <- SpatialPointsDataFrame(coords = dry_season_malaria_2017_2020[, 7:8],
# data = dry_season_malaria_2017_2020[,1:6])
#
health_facility_aggr_sf <- sf::st_as_sf(zipatala_aggregated,
coords = c("LONGITU", "LATITUD"),
crs = 4326, agr = "constant")
# Save as shapefile
# sf::st_write(health_facility_sf, "data/kasungu_zipatala_aggregated.shp", overwrite = TRUE)
# mapview::mapview(health_facility_aggr_sf)
# Set to interactive view
# tmap_mode('view')
old_catchment <- tm_shape(malire)+
tm_polygons()+
tm_shape(health_facility_aggr_sf)+
tm_dots(size = .3, col = "blue", alpha = 0.5)+
tm_text("Names", size = .3, just = "top",
col = "black", remove.overlap = TRUE)+
tm_layout(frame = FALSE,
title = "Old catchment boundaries",
title.size = .8,
title.position = c("left", "top"))+
tm_scale_bar(breaks = c(0, 10, 20),
text.size = .5)
new_catchment <- tm_shape(malire_new)+
tm_polygons()+
tm_shape(health_facility_aggr_sf)+
tm_dots(size = .3, col = "blue", alpha = 0.5)+
tm_text("Names", size = .3, just = "top",
col = "black", remove.overlap = TRUE)+
tm_layout(frame = FALSE,
title = "New catchment boundaries",
title.size = .8,
title.position = c("left", "top"))+
tm_compass(position=c("right", "top"))
tmap::tmap_arrange(old_catchment, new_catchment)
Fig 2. Kasungu Health-care Facilities and Catchment Areas
Using population and health information within each health facility catchment area we produce a choropleth map colored in proportion to a statistical variable that represents an aggregate summary of a geographic characteristic, in this case total population, population density, malaria rate and total malaria cases.
# Take a look at the variables, CRS and geometry type
# head(malire)
# Function to create a choropleth map from sf object
choroplethmap <- function(df, vname = NA, pal = NA, legend.title = NA){
# choropleth map
# df: simple feature polygon layer
# vname: variable name (as character, in quotes)
# pal: color palette
# legend.title: legend title in quotes
# returns:
# a tmap element (plots a map)
tm_shape(df)+
tm_fill(col = vname, style = "quantile",
palette = pal, n = 5, title = legend.title)+
tm_borders()+
tm_layout(legend.position = c("right","bottom"))
}
population <- choroplethmap(malire, vname = "POPULATION",
pal = "Reds", legend.title = "Total population")
pop_density <- choroplethmap(malire, vname = "DENSITY",
pal = "YlOrRd", legend.title = "Population density")
malaria_rate <- choroplethmap(malire, vname = "MALAR_RATE",
pal = "BuGn", legend.title = "Malaria prevalence %")
malaria_cases <- choroplethmap(malire, vname = "MALARIA_CA",
pal = "Purples", legend.title = "Malaria cases")
tmap_arrange(population, pop_density, malaria_rate, malaria_cases)
Fig.3 Population and health information for each health facility catchment area
CRS for kasungu_population layers is already in WGS 84 UTM Zone 36 South, which is the base projected coordinate system for Malawi and has units in meters, hence no need to transform it.The highest estimated population per grid-cell is 7,949 people in 2020.
# Check out the CRS and values of the population layers
kasungu_population_2017
## class : RasterLayer
## dimensions : 150, 128, 19200 (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667 (x, y)
## extent : 491272.7, 609044.2, 8494349, 8632164 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs
## source : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2017_1km_aggregated.tif
## names : ku_pop_2017_1km_aggregated
kasungu_population_2018
## class : RasterLayer
## dimensions : 150, 128, 19200 (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667 (x, y)
## extent : 491272.7, 609044.2, 8494349, 8632164 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs
## source : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2018_1km_aggregated.tif
## names : ku_pop_2018_1km_aggregated
## values : 0, 6253.557 (min, max)
kasungu_population_2019
## class : RasterLayer
## dimensions : 150, 128, 19200 (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667 (x, y)
## extent : 491272.7, 609044.2, 8494349, 8632164 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs
## source : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2019_1km_aggregated.tif
## names : ku_pop_2019_1km_aggregated
## values : 0, 6483.727 (min, max)
kasungu_population_2020
## class : RasterLayer
## dimensions : 150, 128, 19200 (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667 (x, y)
## extent : 491272.7, 609044.2, 8494349, 8632164 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs
## source : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2020_1km_aggregated.tif
## names : ku_pop_2020_1km_aggregated
## values : 0, 7949.033 (min, max)
# Function to create a raster population map
populationmap <- function(df, title){
# population map
# arguments:
# df: aggregated population raster layer
# legend.title: legend title
# returns:
# a tmap-element (plots a map)
tm_shape(df)+
tm_raster(palette = "Reds", title = title,
breaks = c(0,100,200,400,600,800,1000,2000,4000,6000,8000))+
tm_layout(legend.position = c("right", "bottom"),
frame = FALSE)+
tm_scale_bar(position = c("left", "bottom"))
}
# Set to static map
tmap_mode("plot")
pop_2017 <- populationmap(kasungu_population_2017, title = "2017 Population")
pop_2018 <- populationmap(kasungu_population_2018, title = "2018 Population")
pop_2019 <- populationmap(kasungu_population_2019, title = "2019 Population")
pop_2020 <- populationmap(kasungu_population_2020, title = "2020 Population")
tmap_arrange(pop_2017, pop_2018, pop_2019, pop_2020, nrow = 2) # Layout the maps
Fig.4 Estimated total number of people per 1km grid-cell
Buffers of 30m radius have been created around open waterbodies and the geometries of overlapping polygons are unioned together. st_union returns a single geometry sfc object, which is why st_cast and st_as_sf functions have been used to cast and convert the multipolygon buffer geometries to a dissolved or split polygon geometry collection.
surfaceWater_2017 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2017, dist = 30)), "POLYGON"))
surfaceWater_2018 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2018, dist = 30)), "POLYGON"))
surfaceWater_2019 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2019, dist = 30)), "POLYGON"))
surfaceWater_2020 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2020, dist = 30)), "POLYGON"))
waterbody_buffer <- function(waterbody, distance, catchment){
#Buffer the 'water' vector file by 'distance' meters
buffer_radiusk <- st_buffer(waterbody, distance)
# Dissolve the buffers
buffer_radiusk_union <- st_as_sf(st_cast(st_union(buffer_radiusk),"MULTIPOLYGON"))
# Assign Attributes of the 'catchment' to each of the waterbodies.
int_radiusk <- st_intersection(buffer_radiusk_union, catchment)
open_water_buffer <- st_as_sf(int_radiusk)
# Polygons being seen to be in multiple catchments
st_intersects(open_water_buffer, catchment)
# Make the assumption that the attribute is constant throughout the geometry
st_agr(open_water_buffer) = "constant"
st_agr(catchment) = "constant"
return(out = open_water_buffer)
}
st_buffer has been used to compute 1km, 2km and 3km buffers around each waterbody polygon. Then geometry of the buffer features are then combined resulting in resolved internal boundaries. Invalid waterbody polygons can be checked by using st_is_valid which returns by default NA on corrupt geometries.
# For 2017 TropWet surface water polygons
buffer_1km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017,
distance = 1000, catchment = malire_new)
buffer_2km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017,
distance = 2000, catchment = malire_new)
buffer_3km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017,
distance = 3000, catchment = malire_new)
# For 2018 TropWet surface water polygons
buffer_1km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018,
distance = 1000, catchment = malire)
buffer_2km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018,
distance = 2000, catchment = malire)
buffer_3km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018,
distance = 3000, catchment = malire)
# For 2019 TropWet surface water polygons
buffer_1km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019,
distance = 1000, catchment = malire)
buffer_2km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019,
distance = 2000, catchment = malire)
buffer_3km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019,
distance = 3000, catchment = malire)
# For 2020 TropWet surface water polygons
buffer_1km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020,
distance = 1000, catchment = malire)
buffer_2km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020,
distance = 2000, catchment = malire)
buffer_3km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020,
distance = 3000, catchment = malire)
# Map the buffers
buffermap <- function(df, boundary, title = NA){
# function for creating buffer map in ggplot
# arguments:
# df: buffer polygon layer
# boundary: Kasungu district boundary layer
# title: main title
# returns:
# a map-element (plots a map)
ggplot(data = df)+
geom_sf()+
geom_sf(data = boundary, fill = NA)+
theme_classic()+
labs(title = title)
}
# For 2017
buffer1km_2017 <- buffermap(buffer_1km_2017, kasungu_district, title = "2017: 1km Buffers")
buffer2km_2017 <- buffermap(buffer_2km_2017, kasungu_district, title = "2017: 2km Buffers")
buffer3km_2017 <- buffermap(buffer_3km_2017, kasungu_district, title = "2017: 3km Buffers")
# For 2018
buffer1km_2018 <- buffermap(buffer_1km_2018, kasungu_district, title = "2018: 1km Buffers")
buffer2km_2018 <- buffermap(buffer_2km_2018, kasungu_district, title = "2018: 2km Buffers")
buffer3km_2018 <- buffermap(buffer_3km_2018, kasungu_district, title = "2018: 3km Buffers")
# For 2019
buffer1km_2019 <- buffermap(buffer_1km_2019, kasungu_district, title = "2019: 1km Buffers")
buffer2km_2019 <- buffermap(buffer_2km_2019, kasungu_district, title = "2019: 2km Buffers")
buffer3km_2019 <- buffermap(buffer_3km_2019, kasungu_district, title = "2019: 3km Buffers")
# For 2020
buffer1km_2020 <- buffermap(buffer_1km_2020, kasungu_district, title = "2020: 1km Buffers")
buffer2km_2020 <- buffermap(buffer_2km_2020, kasungu_district, title = "2020: 2km Buffers")
buffer3km_2020 <- buffermap(buffer_3km_2020, kasungu_district, title = "2020: 3km Buffers")
grid.arrange(buffer1km_2017, buffer1km_2018, buffer1km_2019, buffer1km_2020,
buffer2km_2017, buffer2km_2018, buffer2km_2019, buffer2km_2020,
buffer3km_2017, buffer3km_2018, buffer3km_2019, buffer3km_2020, ncol = 4)
Fig 5. Buffers around open waterbodies in Kasungu
Here, we create a function that uses open water bodies, health facility catchment boundary and population datasets to estimate the number of people living within 1km, 2km and 3km buffers surrounding the waterbodies. This involves overlaping and intersecting different data layers (buffers of waterbodies, catchment boundary, population raster, etc), so that we can apportion population from one layer to another. In this model, the layer with population estimate is kasungu_population_* and the target layer that does not have an estimate, but for which we desire one, is kasungu_health_facility_catchment/malire. The function returns an object called finalized that has estimated population within the buffers zones, and multipolygon geometry.
nachulu <- function(water, distance, catchment, raster_population){
# function to estimate population out of raster and vector layers
# arguments:
# water: waterbody polygon layer
# distance: buffer distance in meters
# catchment: health facility catchment boundary layer generated from accessibility mapping
# raster_population: aggregated population raster layer
# returns:
# finalized: sf objects with a data frame containing estimated population
#Buffer the 'water' vector file by 'distance' meters
buffer_radiusk <- st_buffer(water, distance)
# Dissolve the buffers. Unioning geometries dissolves for instance internal polygon
# boundaries, which otherwise would lead to invalid MULTIPOLYGON errors in subsequent analysis.
buffer_radiusk_union <- sf::st_as_sf(st_cast(st_union(buffer_radiusk),"POLYGON"))
# Split the buffered water file by the boundaries of the catchment area.
# We don't want to allocate the attributes in this step
int_radiusk <- st_intersection(buffer_radiusk_union, st_geometry(catchment))
water_int_radiusk <- sf::st_as_sf(int_radiusk)
# Convert the MULTIPOLYGON object into several POLYGON objects
water_int_radiusk <- st_cast(st_buffer(water_int_radiusk,0.0), "MULTIPOLYGON") %>%
st_cast("POLYGON")
# Polygons being seen to be in multiple catchments
st_intersects(water_int_radiusk, catchment)
# Estimation of population within X kilometer buffer.
# Extracting zonal statistics from a population raster layer.
# The population raster is a continuous gridded surface layer that assign an
# estimated population density value to every square in their grid.
# The population statistics are then summed and apportioned to the buffer polygons
water_int_radiusk$pop_est<- raster::extract(raster_population, water_int_radiusk,
fun = sum, na.rm = TRUE)
# Make the assumption that the attribute is constant throughout the geometry
st_agr(water_int_radiusk) = "constant"
st_agr(catchment) = "constant"
# Find which catchment each polygon belongs to using its centroid - a point dataset
# representing the geographic center-points of the polygons
assign_catchment <- st_intersection(st_centroid(water_int_radiusk), catchment)
# Calculated total population living X distance for each facility
# Notice that the assign_catchment is comprised of separate POLYGONS (assign_catchment$x).
# The first step is to “dissolve” away these POLYGONS into one MULTIPOLYGON.
# There is no sf equivalent to the ArcMap “dissolve” operation.
# Instead we use a combination of group_by and summarize from the dplyr package.
# Stats::aggregate from sf package, and dplyr::summarize both do essentially the same.
npeople <- assign_catchment %>% dplyr::group_by(DN) %>%
summarize(pop_estimate = sum(pop_est, na.rm = TRUE))
finalized <- merge(catchment, st_drop_geometry(npeople), by='DN', all.x = TRUE)
return(out=finalized)
}
Using the nachulu function, here we estimate the number of people surrounding waterbodies in each health facility catchment area using attributes from the open waterbody buffers, health facility catchment boundary and aggregated population raster layers. That is, population living in various distances from open water bodies e.g. 1km, 2km and 3 km is estimated and assigned to health facilities.
# For 2017 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2017<- nachulu(water = surfaceWater_2017, distance = 1000,
catchment = malire_new,
raster_population = kasungu_population_2017)
run1k_2017$pop_1km <- run1k_2017$pop_estimate
# Estimate population living within 2km radius from water bodies
run2k_2017<- nachulu(water = surfaceWater_2017, distance = 2000,
catchment = malire_new,
raster_population = kasungu_population_2017)
run2k_2017$pop_2km <- run2k_2017$pop_estimate
# Estimate population living within 3km radius from water bodies
run3k_2017<- nachulu(water = surfaceWater_2017, distance = 3000,
catchment = malire_new,
raster_population = kasungu_population_2017)
run3k_2017$pop_3km <- run3k_2017$pop_estimate
# For 2018 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2018<- nachulu(water = surfaceWater_2018, distance = 1000,
catchment = malire_new,
raster_population = kasungu_population_2018)
run1k_2018$pop_1km <- run1k_2018$pop_estimate
# Estimate population living within 2km radius from water bodies
run2k_2018<- nachulu(water = surfaceWater_2018, distance = 2000,
catchment = malire_new,
raster_population = kasungu_population_2018)
run2k_2018$pop_2km <- run2k_2018$pop_estimate
# Estimate population living within 3km radius from water bodies
run3k_2018 <- nachulu(water = surfaceWater_2018, distance = 3000,
catchment = malire_new,
raster_population = kasungu_population_2018)
run3k_2018$pop_3km <- run3k_2018$pop_estimate
# For 2019 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2019 <- nachulu(water = surfaceWater_2019, distance = 1000,
catchment = malire_new,
raster_population = kasungu_population_2019)
run1k_2019$pop_1km <- run1k_2019$pop_estimate
# Estimate population living within 2km radius from water bodies
run2k_2019<- nachulu(water = surfaceWater_2019, distance = 2000,
catchment = malire_new,
raster_population = kasungu_population_2019)
run2k_2019$pop_2km <- run2k_2019$pop_estimate
# Estimate population living within 3km radius from water bodies
run3k_2019<- nachulu(water = surfaceWater_2019, distance = 3000,
catchment = malire_new,
raster_population = kasungu_population_2019)
run3k_2019$pop_3km <- run3k_2019$pop_estimate
# For 2020 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2020 <- nachulu(water = surfaceWater_2020, distance = 1000,
catchment = malire_new,
raster_population = kasungu_population_2020)
run1k_2020$pop_1km <- run1k_2020$pop_estimate
# Estimate population living within 2km radius from water bodies
run2k_2020 <- nachulu(water = surfaceWater_2020, distance = 2000,
catchment = malire_new,
raster_population = kasungu_population_2020)
run2k_2020$pop_2km <- run2k_2020$pop_estimate
# Estimate population living within 3km radius from water bodies
run3k_2020 <- nachulu(water = surfaceWater_2020, distance = 3000, catchment = malire_new,
raster_population = kasungu_population_2020)
run3k_2020$pop_3km <- run3k_2020$pop_estimate
Map the outputs from the nachulu function: layers of polygons representing health facility catchment areas, with a field in the attribute table containing the estimated catchment population pop_estimate in 2017, 2018, 2019 and 2020. In areas where the input data is out of data, e.g, no presence of waterbody polygons, the estimated population is missing.
# Function to create maps of estimated population out of sf objects from the nachulu function
estimatedpop <- function(df, vname = "pop_estimate", title, legend.title = NA){
# estimated population map
# df: estimated population layer from nachulu function
# vname: variable name (as character, in qoutes)
# title: map title in quotes
# legend.title: legend title in qoutes
# returns:
# a tmap-element (plots a map)
tm_shape(df)+
tm_fill(col = vname, style = "quantile",
palette = "Reds", n = 5, title = legend.title)+
tm_borders()+
tm_layout(legend.position = c(0.7,"bottom"),
title.size = .3,
frame = FALSE)+
tm_credits(title, position = c(0,0.75), size = 1)
}
run1k_2017_map <- estimatedpop(run1k_2017, title = "2017: 1km buffers",
legend.title = "Estimated \n population")
run2k_2017_map <- estimatedpop(run2k_2017, title = "2017: 2km buffers",
legend.title = "Estimated \n population")
run3k_2017_map <- estimatedpop(run3k_2017, title = "2017: 3km buffers",
legend.title = "Estimated \n population")
run1k_2018_map <- estimatedpop(run1k_2018, title = "2018: 1km buffers",
legend.title = "Estimated \n population")
run2k_2018_map <- estimatedpop(run2k_2018, title = "2018: 2km buffers",
legend.title = "Estimated \n population")
run3k_2018_map <- estimatedpop(run3k_2018, title = "2018: 3km buffers",
legend.title = "Estimated \n population")
run1k_2019_map <- estimatedpop(run1k_2019, title = "2019: 1km buffers",
legend.title = "Estimated \n population")
run2k_2019_map <- estimatedpop(run2k_2019, title = "2019: 2km buffers",
legend.title = "Estimated \n population")
run3k_2019_map <- estimatedpop(run3k_2019, title = "2019: 3km buffers",
legend.title = "Estimated \n population")
run1k_2020_map <- estimatedpop(run1k_2020, title = "2020: 1km buffers",
legend.title = "Estimated \n population")
run2k_2020_map <- estimatedpop(run2k_2020, title = "2020: 2km buffers",
legend.title = "Estimated \n population")
run3k_2020_map <- estimatedpop(run3k_2020, title = "2020: 3km buffers",
legend.title = "Estimated \n population")
tmap_arrange(run1k_2017_map, run2k_2017_map, run3k_2017_map,
run1k_2018_map, run2k_2018_map, run3k_2018_map,
run1k_2019_map, run2k_2019_map, run3k_2019_map,
run1k_2020_map, run2k_2020_map, run3k_2020_map, ncol = 3)
Fig 6. Estimated population in Kasungu health facility catchments
First, we need to convert the sf outputs from the nachulu function to a plain data frame with as.data.frame, which drops the geometry and gives back a plain data frame
# Convert sf objects to plain data frame
# 2017 -------------------------------------------------------------------------
run1k_2017_df <- as.data.frame(run1k_2017)[,c("DN", "pop_1km")]
run2k_2017_df <- as.data.frame(run2k_2017)[,c("DN", "pop_2km")]
run3k_2017_df <- as.data.frame(run3k_2017)[,c("DN", "pop_3km")]
# 2018 -------------------------------------------------------------------------
run1k_2018_df <- as.data.frame(run1k_2018)[,c("DN", "pop_1km")]
run2k_2018_df <- as.data.frame(run2k_2018)[,c("DN", "pop_2km")]
run3k_2018_df <- as.data.frame(run3k_2018)[,c("DN", "pop_3km")]
# 2019 -------------------------------------------------------------------------
run1k_2019_df <- as.data.frame(run1k_2019)[,c("DN", "pop_1km")]
run2k_2019_df <- as.data.frame(run2k_2019)[,c("DN", "pop_2km")]
run3k_2019_df <- as.data.frame(run3k_2019)[,c("DN", "pop_3km")]
# 2020 -------------------------------------------------------------------------
run1k_2020_df <- as.data.frame(run1k_2020)[,c("DN", "pop_1km")]
run2k_2020_df <- as.data.frame(run2k_2020)[,c("DN", "pop_2km")]
run3k_2020_df <- as.data.frame(run3k_2020)[,c("DN", "pop_3km")]
# Merge the data frames. NB: one important variable 'dry season malaria cases'
# is yet to be added to the data frames
finalized_2017_df <- cbind.data.frame(run1k_2017_df, run2k_2017_df, run3k_2017_df)
finalized_2018_df <- cbind.data.frame(run1k_2018_df, run2k_2018_df, run3k_2018_df)
finalized_2019_df <- cbind.data.frame(run1k_2019_df, run2k_2019_df, run3k_2019_df)
finalized_2020_df <- cbind.data.frame(run1k_2020_df, run2k_2020_df, run3k_2020_df)
# Exclude unnecessary columns and rows
# 2017 -------------------------------------------------------------------------
finalized_2017_df <- finalized_2017_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")]
finalized_2017_df <- cbind("Fid" = rownames(finalized_2017_df), finalized_2017_df)
# 2018 -------------------------------------------------------------------------
finalized_2018_df <- finalized_2018_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")]
finalized_2018_df <- cbind("Fid" = rownames(finalized_2018_df), finalized_2018_df)
# 2019 -------------------------------------------------------------------------
finalized_2019_df <- finalized_2019_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")]
finalized_2019_df <- cbind("Fid" = rownames(finalized_2019_df), finalized_2019_df)
# 2020 -------------------------------------------------------------------------
finalized_2020_df <- finalized_2020_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")]
finalized_2020_df <- cbind("Fid" = rownames(finalized_2020_df), finalized_2020_df)
The estimated population within 1km, 2km and 3km buffers around water bodies is now being merged with the 2017 to 2020 dry season malaria dataframe using a user-defined function: organize
# Create a function that merges and tidy the population and malaria dataframes
organize <- function(malaria_data, pop_data){
# function to merge, tidy dry season malaria and estimated population datasets
# arguments:
# malaria_data: sf point object with a data frame containing aggregated
# malaria data at health facilities level
# pop_data: sf polygon object with a data frame containing the estimated population
# returns:
# hfc_malaria_pop: sf objects with a data frame containing dry season malaria and estimated population
# Convert sf objects to spatial
estimated_pop_shp <- as(pop_data, "Spatial")
malaria_shp <- as(malaria_data, "Spatial")
# Match CRS
malaria_shp <- spTransform(malaria_shp, crs(estimated_pop_shp))
# Overlay aggregated health facility points and extract estimated population
# Using 'point.in.poly' to return a point spatial object, in this case location of health facilities
# and estimated population instead of sp::over function, which simply returns
# a data frame, with the same no. rows.
# Argument 'sp = TRUE' returns an sp class object, else returns sf class object
health_facilities_pop <- spatialEco::point.in.poly(malaria_shp, estimated_pop_shp, sp = TRUE)
# head(health_facilities_pop@data)
# Add the extracted ID, health facility names, dry season malaria cases and estimated population to
# the health facility catchments (hfc)
hfc_pop_malaria_shp <- merge(estimated_pop_shp, health_facilities_pop,
by.x = "FID", by.y = "DN")
# Convert the merged shapefile containing estimated population and malaria data to sf-object
hfc_pop_malaria_sf <- sf::st_as_sf(hfc_pop_malaria_shp)
# Tidy the data by reordering columns and dropping those not needed
# First, get column names
# colnames(hfc_pop_malaria_sf)
# [1] "DN" "pop_1km" "pop_estimate" "FID" "Names" "dr_2017"
# [7] "dr_2018" "dr_2019" "dr_2020" "coords.x1" "coords.x2" "geometry"
hfc_pop_malaria_sf_trim <- hfc_pop_malaria_sf %>%
dplyr::select(-c(pop_estimate, coords.x1, coords.x2))
# Reorder columns by position
hfc_malaria_pop_sf <- hfc_pop_malaria_sf_trim[, c(4, 3, 1, 2, 5, 6, 7, 8, 9)]
return(out = hfc_malaria_pop_sf)
}
# Invoking the function
# 2017 data -------------------------------------------------------------------------------
hfc_1km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2017)
hfc_2km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2017)
hfc_3km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2017)
# 2018 data -------------------------------------------------------------------------------
hfc_1km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2018)
hfc_2km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2018)
hfc_3km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2018)
# 2019 data -------------------------------------------------------------------------------
hfc_1km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2019)
hfc_2km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2019)
hfc_3km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2019)
# 20 20 data ------------------------------------------------------------------------------
hfc_1km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2020)
hfc_2km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2020)
hfc_3km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2020)
# Check if merging and tidying worked
# Create a user-defined function that maps the merged sf objects
malaria_map <- function(df, year = NA, title, legend.title = NA){
# dry season malaria cases map
# df: sf objects from 'organize' function
# vname: variable name (as character, in qoutes)
# title: map title in quotes
# legend.title: legend title in qoutes
# returns:
# a tmap-element (plots a map)
tm_shape(df)+
tm_fill(col = year, palette = "Oranges", n = 5, title = legend.title,
breaks = c(400, 4000, 10000, 16000, 20000, 30000 ))+
tm_borders("burlywood")+
tm_layout(legend.position = c(0.6, "bottom"),
legend.text.size = 0.6,
legend.title.size = 0.8,
title.size = .3,
frame = FALSE)+
tm_credits(title, position = c(0.2, 0.8), size = .9)+
tm_view(view.legend.position = c("right", "bottom"))
}
dry_season_malaria_2017 <- malaria_map(hfc_1km_2017_sf, year = "dr_2017", title = "2017",
legend.title = "2017 malaria cases")
dry_season_malaria_2018 <- malaria_map(hfc_1km_2018_sf, year = "dr_2018", title = "2018",
legend.title = "2018 malaria cases")
dry_season_malaria_2019 <- malaria_map(hfc_1km_2019_sf, year = "dr_2019", title = "2019",
legend.title = "2019 malaria cases")
dry_season_malaria_2020 <- malaria_map(hfc_1km_2020_sf, year = "dr_2020", title = "2020",
legend.title = "2020 malaria cases")
tmap::tmap_arrange(dry_season_malaria_2017, dry_season_malaria_2018,
dry_season_malaria_2019, dry_season_malaria_2020, ncol = 2)
Fig. 7: Dry season malaria cases, Kasungu
dry_season_malaria <- as(hfc_1km_2017_sf, "Spatial")
ggplot2::ggplot(reshape2::melt(dry_season_malaria@data[, c("dr_2017", "dr_2018", "dr_2019", "dr_2020")]),
aes(variable, value)) +
geom_boxplot() +
xlab("Years") +
ylab("Malaria cases") +
theme_classic()
Fig. 8: Dry season malaria cases from 2017 - 2020 in Kasungu
# 2017 dataframes --------------------------------------------------------------
hfc_1km_2017_df <- hfc_1km_2017_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(Names, FID, pop_1km_2017 = `pop_1km`,
dr_2017, dr_2018, dr_2019, dr_2020)
hfc_2km_2017_df <- hfc_2km_2017_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID,pop_2km_2017 = `pop_2km`)
hfc_3km_2017_df <- hfc_3km_2017_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2017 = `pop_3km`)
# 2018 dataframes --------------------------------------------------------------
hfc_1km_2018_df <- hfc_1km_2018_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2018 = `pop_1km`)
hfc_2km_2018_df <- hfc_2km_2018_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2018 = `pop_2km`)
hfc_3km_2018_df <- hfc_3km_2018_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2018 = `pop_3km`)
# 2019 dataframes --------------------------------------------------------------
hfc_1km_2019_df <- hfc_1km_2019_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2019 = `pop_1km`)
hfc_2km_2019_df <- hfc_2km_2019_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2019 = `pop_2km`)
hfc_3km_2019_df <- hfc_3km_2019_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2019 = `pop_3km`)
# 2020 dataframes --------------------------------------------------------------
hfc_1km_2020_df <- hfc_1km_2020_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2020 = `pop_1km`)
hfc_2km_2020_df <- hfc_2km_2020_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2020 = `pop_2km`)
hfc_3km_2020_df <- hfc_3km_2020_sf %>% unclass %>%
dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2020 = `pop_3km`)
# Bind the dataframes and create new columns containing malaria incidence rate for each year
# The malaria proportion (mProp) is the number of cases of malaria divided by the
# total population (Pop).
kasungu_model_df <- cbind(hfc_1km_2017_df, hfc_2km_2017_df, hfc_3km_2017_df,
hfc_1km_2018_df, hfc_2km_2018_df, hfc_3km_2018_df,
hfc_1km_2019_df, hfc_2km_2019_df, hfc_3km_2019_df,
hfc_1km_2020_df, hfc_2km_2020_df, hfc_3km_2020_df) %>%
dplyr::select(-FID) %>%
dplyr::rowwise() %>%
dplyr::mutate(mProp_1km2017 = round(dr_2017/pop_1km_2017, 1),
mProp_2km2017 = round(dr_2017/pop_2km_2017, 1),
mProp_3km2017 = round(dr_2017/pop_3km_2017, 1))
After knitting this R markdown, run the Kasungu_model.R to fit a glm to the covariates: estimated population around water bodies and dry season malaria cases
write.csv(kasungu_model_df, file = "data/Kasungu_hfc_malaria_pop_2017_2020.csv")